# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import FormRequest
import json
from ..items import JuchaoItem


class CrawlSpider(scrapy.Spider):
    name = 'crawl'
    allowed_domains = ['cninfo.com.cn']

    def start_requests(self):
        url = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
        for i in range(1, 100):
            data = {
                "pageNum": str(i),
                "pageSize": "30",
                "column": "szse",
                "tabName": "fulltext",
                "plate": "szmb;shmb",
                "stock": "",
                "searchkey": "",
                "secid": "",
                "category": "category_rcjy_szsh",
                "trade": "",
                "seDate": "2019-01-01~2020-01-01",
                "sortName": "",
                "sortType": "",
                "isHLtitle": "true",

            }
            yield FormRequest(url=url, formdata=data, callback=self.parse)

    def parse(self, response):
        content = json.loads(response.body.decode('utf-8'))
        data = content["announcements"]
        for da in data:
            name = da["secName"]
            code = da["secCode"]
            PDF_url = 'http://www.cninfo.com.cn/new/announcement/download?bulletinId=%s' % da["announcementId"]
            yield scrapy.Request(url=PDF_url, meta={'name': name, 'code': code, 'PDF_id': da["announcementId"]},
                                 callback=self.pares_to, dont_filter=True)

    def pares_to(self, response):
        name = response.meta['name']
        code = response.meta['code']
        PDF_id = response.meta['PDF_id']
        PDF_content = response.body

        yield JuchaoItem(name=name, code=code, PDF_id=PDF_id, PDF_content=PDF_content)


